---
title: "Witch trials: Spatio-Temporal Diffusion"
author: "Kerice Doten-Snitker, with Yuan Hsiao and Steve Pfaff"
date: "`r Sys.Date()`"
output: html_document
---

```{r setup, echo = FALSE, cache = FALSE}
knitr::opts_chunk$set(dev = c('png', 'pdf'), 
        fig.align = 'center', fig.height = 5, fig.width = 10, 
        fig.path=here::here("Witchtrial_diffusion", "out_figures/")) 

Sys.setlocale(category = "LC_ALL", locale = "en_US.UTF-8")
```
```{r load_packages, eval=TRUE, echo=FALSE}
# add packages with the pacman library, which installs if not installed
library(pacman)
# use the here package for improving replicability
p_load(here)
# for data manipulation
p_load(magrittr, plyr, dplyr, cubelyr, tidyverse, haven, zoo, reshape2, abind)
# for geographical analyses and calculations
p_load(geosphere, raster, gdistance, rgdal, rgeos, spatstat, ncdf4)
# for various model types and simulations
p_load(coxme, lme4, rstan, brms, rstanarm, splines2, tidybayes) # for Bayesian regression
p_load(ggeffects, sjstats)
# for exploration and visuals
p_load(ggplot2, ggthemes, skimr, corrplot, cowplot, bayesplot, sjPlot)
# for output
p_load(tinytex, pander, knitr, stargazer, texreg, gridExtra, kableExtra)

set.seed(1487)
options(scipen=999) # don't use scientific notation
```

```{r load_pander_methods, include= FALSE}
require(pander)
panderOptions('round', 2)
panderOptions('digits',7)
panderOptions('keep.trailing.zeros', TRUE)
panderOptions('table.style', 'multiline')
#panderOptions('table.split.table', Inf)
```
```{r load_ggplot_options, echo=FALSE}
mytheme <- theme_bw() + 
        theme(legend.position="right", legend.direction="vertical", 
        legend.background = element_rect(fill="transparent", colour=NA),
        legend.key = element_rect(fill="transparent", colour=NA),
        legend.key.size=unit(.33,"cm"),
        legend.title=element_text(size=12),
        legend.text=element_text(size=12),
        panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        # panel.background = element_blank(),
        panel.border = element_rect(color = "black"),
        # text=element_text(family="sans"),
        plot.title=element_text(size=16,face="bold"),
        plot.subtitle=element_text(size=14,face="bold"),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

```

The underlying data from Leeson & Russ is a list of trials by city, with the year and number of people tried. When it comes to regressions, the data is arranged for survival-style analyses where the observations are a panel of city-years that get censored at first witch trial.

```{r load_data, eval = TRUE, echo = FALSE}
trials_clean <- read.csv(here::here("Witchtrial_diffusion", "out_data", "trials.csv"), header=TRUE, stringsAsFactors=FALSE)
trials_laea <- read.csv(here::here("Witchtrial_diffusion", "out_data", "trials_laea.csv"), header=TRUE, stringsAsFactors=FALSE) #9848 obs
trials_net_laea <- read_csv(here::here("Witchtrial_diffusion", "out_data", "trials_net_laea.csv"))
battles_laea <- read.csv(here::here("Witchtrial_diffusion", "out_data", "battles_laea.csv"), header=TRUE, stringsAsFactors=FALSE)
malleus <- read.csv(here::here("Witchtrial_diffusion", "Malleus_USTC_Data-entry.csv"), header=TRUE, encoding = "latin1", stringsAsFactors=FALSE)
demonology <- read.csv(here::here("Witchtrial_diffusion", "Demonology_USTC_Data-entry.csv"), header=TRUE, encoding = "latin1", stringsAsFactors=FALSE)
```

## Exploration and descriptives

#### How many cases are we dealing with?
```{r basics, eval=TRUE, echo=TRUE}
length(unique(surv_sample$bhpr_runningnr)) # 553 cities; ~20 from BHPR don't have centrality
length(unique(surv_sample$year)) # 251 years, 1400-1650
nrow(surv_sample) #125163 obs
```

#### What are the temporal trends in witch trials and demonological printing?

Witch trials and editions of demonological books:
*Malleus maleficarum* in black lines; others in dashed grey lines

```{r Figs_histograms, eval=FALSE, echo=FALSE, fig.height = 4, fig.width = 6}

p.hist.1 <- ggplot(surv_sample[surv_sample$year<1651,], aes(x=year, y=firsttrial)) + geom_bar(stat="identity") +
  vline_at(malleus$Year, linetype = 1, size = 0.25, color="black") +
  vline_at(demonology$Year[demonology$ShortTitle!="Malleus maleficarum"], linetype = 2, size = 0.25, color="darkgrey") +
  ggplot2::labs(
    title = "",
    subtitle = "",
    x = "Year", y = "Count of cities holding\ntheir first witch trial"
  ) +
  scale_x_continuous(breaks = seq(1400,1650,50), limits = c(1390, 1660)) +
  scale_y_continuous(breaks = seq(0,9,1), limits = c(0, 10))

plot(p.hist.1 + mytheme)

tiff(here::here("Witchtrial_diffusion", "out_figures", "Figs1_hist-1.tiff"), width = 8, height = 4, units = 'in', res = 300)
plot(p.hist.1 + mytheme)
dev.off()

```


### Descriptive statistics
  
```{r tables_setup, eval=TRUE, echo=FALSE}
table_2_vars <- c("time", "firsttrial", "tried", "triedbin",
               "MM_sum10", "texts_sum10", "texts_not_sum10", 
                "diffMM_10", "difftexts_10", "difftexts_not_10", 
                "tried_sum10","difftried_10", "battles_sum10", "diffbattle_10", 
               "degree", "between", "eigen", 
               "temp_adj_lag", "prc_adj_lag", # climate
               "temp_shock", "temp_shock_hi", "temp_shock_lo",
               "prc_shock", "prc_shock_hi", "prc_shock_lo",
               "hanse_dollinger", "hanse_pagel", "freeimp_jacob", #gov capacity
               "dio_cheney", "archdio_cheney", "cath_cheney", # rel capacity
               "decade", 
               "year", "bhpr_runningnr", "grid_0")

table_2_names <- c("Time", "First trial", "People tried", "Trial held",
                   "Malleus editions, past 10 years", "Malleus editions, distance-weighted",
                   "All text editions, past 10 years", 
                        "All text editions, distance-weighted",
                   "All non-Malleus editions, past 10 years", 
                        "All non-Malleus editions, distance-weighted",
                   "Total persons tried, past 10 years", "Total persons tried, distance-weighted",
                   "Battles, past 10 years", "Battles, distance-weighted",
                   "Degree", "Betweennes", "Eigenvector",
                   "Relative temp.", "Relative precip.", 
                   "Temp. shock", "Temp. shock: high", "Temp. shock: low",
                   "Precip. shock", "Precip. shock: high", "Precip. shock: low",
                   "Hanse member - Dol.", "Hanse member", "Free or imperial", "Diocese", "Archdiocese", "Catholic seat",
                   "Decade","Year", "BHPR id", "High-coverage area"
                   )

```
```{r descriptives_setup, eval=TRUE, echo=FALSE}
# all possible variables...

descr_surv_net <- sapply(surv_sample, 
                    function(x) rbind(count = length(which(!is.na(x))), 
                                      minimum = min(as.numeric(x), na.rm=TRUE),
                                      maximum = max(as.numeric(x), na.rm=TRUE),
                                      mean = mean(as.numeric(x), na.rm=TRUE),
                                      median = median(as.numeric(x), na.rm=TRUE),
                                      stdev = sd(as.numeric(x), na.rm=TRUE)))
descr_surv_net <- round(t(descr_surv_net), 2)
colnames(descr_surv_net) <- c("n","Min","Max","Mean","Median","St.Dev")
rownames(descr_surv_net) <- dimnames(descr_surv_net)[[1]]

table_2 <- descr_surv_net[table_2_vars,]
rownames(table_2) <- table_2_names
write.csv(table_2, here::here("Witchtrial_diffusion", "out_tables", "descriptives_surv_table.csv"), row.names=TRUE)

```
  
 Table 2. Table of descriptives for unscaled data, censored time-series sample
 
```{r table_2_descriptives, eval=FALSE, results='asis', echo=FALSE}
stargazer(table_2, digits=2, type = "html", summary = FALSE, align=TRUE, out=here::here("Witchtrial_diffusion","out_tables","table_2_descriptives.htm"))
```
  

```{r contingency_tables, eval=FALSE, echo=FALSE}

cor(surv_sample$eigen, surv_sample$freeimp_jacob)
```


```{r proof_of_concept, eval=FALSE, echo=FALSE}

# Need a "life table"
surv_table <- surv_sample %>%
  dplyr::select(bhpr_runningnr, year, time, firsttrial, 
                MM_sum10, diffMM_10,
                # tried_sum10, difftried_10, battles_sum10, diffbattle_10,
                degree, between, eigen, grid_0) %>%
  arrange(year) %>%
  group_by(bhpr_runningnr) %>%
  summarize(censored = max(firsttrial),
            MMsum10atexit = last(MM_sum10),
            diffMM10atexit = last(diffMM_10),
            # triedsum10atexit = last(tried_sum10),
            # difftried10atexit = last(difftried_10),
            degree = first(degree), between = first(between), 
            eigen = first(eigen), 
            grid_0 = first(grid_0),
            time = max(time)) %>% 
  ungroup() %>%
  arrange(bhpr_runningnr)

summary(surv_table)
table(surv_table$censored)
surv_table %>% arrange(time)

# a "quick" survival analysis, as a sample with time-dependent covariates
# fit_model_1_surv <- coxme(Surv(time, firsttrial) ~ MM_sum10 + diffMM_10 +
#                                tried_sum10 + difftried_10 + 
#                                degree + between + eigen +
#                                # hanse_pagel +  # gov capacity
#                                # freeimp_jacob + cath_cheney + 
#                                # temp_adj_lag + prc_adj_lag + # climate
#                                grid_0 + # region fixed effects
#                                (1|bhpr_runningnr), 
#                            data = surv_sample)
# The values really bounce around depending on what's in the model...and therefore can't do the full version with all the other covariates. The results are just nonsensical, because there ends up being too much variation in too many dimensions across too few cases to have any sort of reasonable estimation power. Even a Bayesian cox model can't handle all the variables we want to include.
# fit_model_1_surv_b <-
#   brm(time|cens(firsttrial) ~ MM_sum10 + diffMM_10 +
#                                tried_sum10 + difftried_10 +
#                                battles_sum10 + diffbattle_10 +
#                                degree + between + eigen +
#                                # hanse_pagel +  # gov capacity
#                                # freeimp_jacob + cath_cheney +
#                                # temp_adj_lag + prc_adj_lag + # climate
#                                grid_0 + # region fixed effects
#                                (1|bhpr_runningnr),
#       family = cox(link = "log"), # survival
#       data = surv_sample,
#       prior = c(prior(student_t(7, 0, 2.5), class=b), # all vars, student's t
#   prior(student_t(4, 0, 1), class=Intercept)), # random fx, half-t,
#       chains = 3, iter = 2500, warmup=750, seed=1487,
#                        control = list(adapt_delta = 0.9),
#                        cores = parallel::detectCores())
```

### Capacity categories

How many cities in the sample ever were...

```{r capacity_explore, eval=FALSE, echo=FALSE}
panel_dataset_net %>%
  dplyr::select(city, hanse_dollinger) %>%
  distinct() %>%
  filter(hanse_dollinger==1) %>% nrow # 88
panel_dataset_net %>%
  dplyr::select(city, hanse_pagel) %>%
  distinct() %>%
  filter(hanse_pagel==1) %>% nrow # 84
panel_dataset_net %>%
  dplyr::select(city, freeimp_jacob) %>%
  distinct() %>%
  filter(freeimp_jacob==1) %>% nrow # 44
panel_dataset_net %>%
  dplyr::select(city, cath_cheney) %>%
  distinct() %>%
  filter(cath_cheney==1) %>% nrow # 91
```

### Confessions, religious competition, and trials

Should we filter battles to those within 100km? That would approximate L&R more. Could make a count of battles within 100km instead of battles weighted by distance. But are any of the battles are within 100km? Yes! But few, considering the temporal lag.

```{r battles_explore, eval=FALSE, echo=FALSE}

battle.distances <- surv_dataset_net %>%
  dplyr::select(bhpr_runningnr, year, tried, Battles_1) %>%
  mutate(battles100km_1 = rep(NA, dim(.)[1]))

for (i in 1: dim(battle.distances)[1]){
  if(!is.null(battle.distances$Battles_1[[i]][[1]])){ # if prior battles for t=1
    dist.subs <- dist_euc_matrix[battle.distances$newID[i], battle.distances$Battles_1[[i]]]
    battle.distances$battles100km_1[i] <- sum(ifelse(dist.subs<100,1,0), 
                                             na.rm=TRUE)
  } else {battle.distances$battles100km_1[i] <- 0} # if no prior for t=1, fill 0
}

battle.distances <- battle.distances %>%
  arrange(year) %>%
  group_by(bhpr_runningnr) %>% 
  mutate(battles100km_5 = rollapplyr(battles100km_1, 5, sum, 
                               na.rm=TRUE, fill=NA, partial=TRUE, align="right"),
         battles100km_10 = rollapplyr(battles100km_1, 10, sum, 
                                na.rm=TRUE, fill=NA, partial=TRUE, align="right")) %>%
  filter(year>=1400)

prop.table(table(ifelse(battle.distances$tried>1,1,0), 
                 ifelse(battle.distances$battles100km_10>1,1,0))) # 90% of city-years had no battles within 100km in the past 10 years; 86% of city-years with trial onset had no battles within 100km

battle.distances %>%
  group_by(bhpr_runningnr) %>%
  summarize(battles100km = ifelse(sum(battles100km_1)>0,1,0)) %>% 
  filter(battles100km>0) # 561 cities had a battle <100km away

# another way to get some some summary stats
city_battle_dist <- dist_euc_matrix %>%
  dplyr::select(starts_with("Btl")) %>%
  t() %>% as.data.frame() %>%
  dplyr::select(starts_with("BHPR_"))

# how many cities had a battle within 100km?
city_battle_dist %>%
  summarize(across(everything(), list(minimum = min))) %>%
  t() %>% as.data.frame() %>% summarize(local=sum(ifelse(.<100,1,0)/n()),
                                        mean=mean(V1), median=median(V1))
# 95.9%

# median distance?
city_battle_dist %>%
  summarize(across(everything(), list(median = median))) %>%
  t() %>% as.data.frame() %>% summarize(local=sum(ifelse(.<100,1,0)/n()),
                                        mean=mean(V1), median=median(V1))
# mean median distance was 611.26km, median median dist was 553.68

# How many city-battle pairs were of less than 100km in distance?
city_battle_dist %>%
  summarize(local=sum(ifelse(.<100,1,0)/(dim(.)[1]*dim(.)[2])))
# 2.8%
```

Is there a Protestant vs Catholic divide? produce tables with (1) the Cath/Prot split of the cities, prob by time periods, and (2) the Cath/Prot split of trials

```{r confession_explore, eval= TRUE, echo=FALSE}

rubin <- read_dta(here::here("Addl datasets", "Rubin_Printing_and_Protestants_Data-ReStat.dta"))
pfaff <- read_csv(here::here("Addl datasets","Kim and Pfaff_ASR_MasterFile_10232008.csv"), show_col_types = FALSE)

trials_net_prot <- rubin %>%
  dplyr::select(city, prot1560, prot1600) %>%
  merge(tradenet_all, by.y = "rubin_city", by.x = "city", all.x=TRUE, all.y=TRUE) %>%
  # all.y to keep multiple matches from trial records
  merge(trials_clean[!is.na(trials_clean$city),c("city","year","decade","tried","deaths")], by.x = "city.y", by.y = "city", all.x=TRUE, all.y=TRUE) %>% 
  filter(year>=1400&year<=1650|!is.na(bhpr_runningnr)) # BHPR no trials have not year

with(trials_net_prot %>% mutate(triedbin = ifelse(tried>0,1,0)), table(prot1600, triedbin, useNA="ifany")) # about even frequency of trials in Protestant vs Catholic cities, using the full trial data

panel_reform <- trials_net_prot %>%
    #  restrict to cities in the network
  filter(!is.na(bhpr_runningnr)) %>%
  replace_na(list(year=99)) %>% # no year because no trials
  group_by(bhpr_runningnr, kp_code, year) %>% 
  summarize(tried = sum(tried), deaths=sum(deaths), prot1530 = max(prot1530), prot1560 = max(prot1560), prot1600 = max(prot1600)) %>% ungroup() %>%
  merge(pfaff[, c("ID", "TOWN", "REFORMYR")], by.x = "kp_code", by.y = "ID", all.x=TRUE, all.y=FALSE) %>%
  mutate(
    prot1530_rkp = case_when(REFORMYR>=1500&REFORMYR<=1530 ~ 1, prot1530==1 ~ 1,
                             REFORMYR==-99 ~ 0, prot1530==0 ~ 0),
    prot1545_rkp = case_when(REFORMYR>=1500&REFORMYR<=1545 ~ 1, prot1530==1 ~ 1,
                                REFORMYR==-99 ~ 0, prot1530==0 ~ 0),
    prot1560_rkp = case_when(REFORMYR>=1500&REFORMYR<=1560 ~ 1, prot1560==1 ~ 1,
                                REFORMYR==-99 ~ 0, prot1560==0 ~ 0),
    prot1600_rkp = case_when(REFORMYR>=1500&REFORMYR<=1600 ~ 1, prot1600==1 ~ 1,
                                REFORMYR==-99 ~ 0, prot1600==0 ~ 0)) %>%
  group_by(bhpr_runningnr) %>%  arrange(year) %>%
  mutate(triedbin = ifelse(tried>0,1,0),
         firsttrialyr = min(year[triedbin==1])) %>%
  ungroup() %>%
  mutate(split_1519 = case_when(year>=1400&year<1519 ~ 0,
                                year>=1519&year<=1650 ~ 1),
         split_1530 = case_when(year>=1400&year<=1530 ~ 0,
                                year>1530&year<=1650 ~ 1),
         split_1545 = case_when(year>=1400&year<=1545 ~ 0,
                                year>1545&year<=1650 ~ 1),
         split_1560 = case_when(year>=1400&year<=1560 ~ 0,
                                year>1560&year<=1650 ~ 1),
         split_1600 = case_when(year>=1400&year<=1600 ~ 0,
                                year>1600&year<=1650 ~ 1),
         firsttrialyr = ifelse(is.infinite(firsttrialyr), NA, firsttrialyr),
         firsttrial = case_when(
           year==firsttrialyr ~ 1, year>firsttrialyr ~ 2, TRUE ~ 0))

city_reform_table <- panel_reform %>%
  group_by(bhpr_runningnr) %>%
  summarize(tried1400_1519 = sum(tried[split_1519==0], na.rm = T),
            tried1519_1560 = sum(tried[split_1519==1&split_1560==0], na.rm = T),
            tried1560_1600 = sum(tried[split_1560==1&split_1600==0], na.rm = T),
            tried1400_1650 = sum(tried, na.rm = T),
            first1400_1519 = sum(triedbin[split_1519==0&firsttrial==1], na.rm = T),
            first1519_1560 = sum(triedbin[split_1519==1&split_1560==0&firsttrial==1], na.rm = T),
            first1560_1600 = sum(triedbin[split_1560==1&split_1600==0&firsttrial==1], na.rm = T),
            first1400_1650 = sum(triedbin[firsttrial==1], na.rm = T)) %>%
  mutate(triedbin1400_1519 = ifelse(tried1400_1519>0,1,0),
         triedbin1519_1560 = ifelse(tried1519_1560>0,1,0),
         triedbin1560_1600 = ifelse(tried1560_1600>0,1,0),
         triedbin1400_1650 = ifelse(tried1400_1650>0,1,0))

city_reform_table <- panel_reform %>%
  dplyr::select(bhpr_runningnr, prot1530_rkp, prot1545_rkp, prot1560_rkp, prot1600_rkp) %>%
  distinct %>%
  merge(city_reform_table, by="bhpr_runningnr", all=TRUE)

reform_table_1530 <- city_reform_table %>%
  group_by(prot1530_rkp) %>%
  summarize(n_obs = n(),
            first1400_1519 = sum(first1400_1519),
            tried1400_1519 = sum(tried1400_1519),
            triedbin1400_1519 = sum(triedbin1400_1519)) %>%
  ungroup
reform_table_1530

reform_table_1560 <- city_reform_table %>%
  group_by(prot1560_rkp) %>%
  summarize(n_obs = n(),
            first1519_1560 = sum(first1519_1560),
            tried1519_1560 = sum(tried1519_1560),
            triedbin1519_1560 = sum(triedbin1519_1560)) %>%
  ungroup
reform_table_1560

reform_table_1600 <- city_reform_table %>%
  group_by(prot1600_rkp) %>%
  summarize(n_obs = n(),
            first1400_1519 = sum(first1400_1519),
            tried1400_1519 = sum(tried1400_1519),
            triedbin1400_1519 = sum(triedbin1400_1519),
            first1519_1560 = sum(first1519_1560),
            tried1519_1560 = sum(tried1519_1560),
            triedbin1519_1560 = sum(triedbin1519_1560),
            first1560_1600 = sum(first1560_1600),
            tried1560_1600 = sum(tried1560_1600),
            triedbin1560_1600 = sum(triedbin1560_1600),
            first1400_1650 = sum(first1400_1650),
            tried1400_1650 = sum(tried1400_1650),
            triedbin1400_1650 = sum(triedbin1400_1650)) %>%
  ungroup
reform_table_1600

```

We have 63 cities that turned Protestant by 1530. Splitting by future confessional status, Catholic and Protestant cities had about the same trial adoption rates prior to 1519. Protestant cities also held trials in fewer years.

***
## Regression analyses

```{r models_setup, eval= TRUE, echo=FALSE}

# Model 1: no trial vs initiate trials
model_1_censor <- as.formula(firsttrial ~ MM_sum10 + diffMM_10 +
                               tried_sum10 + difftried_10 + 
                               battles_sum10 + diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

# Model 2: swap MM for all texts
model_2_censor <- as.formula(firsttrial ~ texts_sum10 + difftexts_10 +
                               tried_sum10 + difftried_10 + 
                               battles_sum10 + diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

# Model 3: for all texts excluding MM
model_3_censor <- as.formula(firsttrial ~ texts_not_sum10 + difftexts_not_10 +
                               tried_sum10 + difftried_10 + 
                               battles_sum10 + diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

# alternatives: sub climate "shock" binary (don't, too few); limit indep vars;
# one without regional dummies as a check

```
 
  We transformed the continuous variables by centering them on mean=0 and standardizing them so that 1 unit is 1 std dev. Including the climate vars, as already adjusted to local values, means something like normalizing the amount of variation that happens at a local level. These adjustments should improve the computation of the regressions.

 Priors: We set weakly informative priors (Gelman, Jakulin, Pittau, & Su, 2008). Continuous right-hand variables are scaled around their means and with 1 unit being 1 sd. For the betas, set the prior to student's t, centered on 0, with some volume in the tails, as recommended by STAN folks. The binary variables are much more likely to be 0 than 1, and other non-transformed variables are centered on 0, so keep 0 as the center for the student's t.
 We don't expect much city-specific variation, so we make the intercept prior have a smaller scale, which makes it more peaked/narrower, while still retaining volume in the tails. The intercepts will likely be negative because there are more observations without trials than with trials, but we don't know how negative.
 An alternative would be to use the L&R results as priors...if our outcome variables were the same and more of our right-hand variables were comparable to theirs.
 
 See https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations
 
### Calculate results

Use Bayesian GLM in brms.

 Model tuning: With model defaults, R-hats are >1.01 for variables with limited variation across obs (intercept, degree, grid_0), bulk ESS and tail ESS are low, and there are divergent transitions (not too many as %, but still). Adjust the step size via adapt_delta.

 Silently run the main models.
 
```{r models_run, echo = FALSE, eval= TRUE, include = FALSE}

rstan::rstan_options(auto_write = TRUE)

prior_model_1 <- c(
  prior(student_t(7, 0, 2.5), class=b), # all vars, student's t
  prior(student_t(4, 0, 1), class=Intercept)) # random fx, half-t

t_prior <- student_t(7, 0, 2.5) # all vars, student's t
half_t_prior <- student_t(4, 0, 1) # random fx

# Sys.time()
fit_model_1_censor <- brm(model_1_censor,
                       data = surv_sample_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time() # 1.75 hrs
saveRDS(fit_model_1_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_1_censor.rds"))
# fit_model_1_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_1_censor.rds"))

# Now run model specification that uses all texts, not just MM
# Sys.time()
fit_model_2_censor <- brm(model_2_censor,
                          data = surv_sample_scaled,
                          bernoulli(link = "logit"),
                          prior = prior_model_1,
                          chains = 4, iter = 4000, warmup=1000, seed=1487,
                          control = list(adapt_delta = 0.9),
                          cores = parallel::detectCores())
# Sys.time() # 2 hr
saveRDS(fit_model_2_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_2_censor.rds"))
# fit_model_2_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_2_censor.rds"))

# all non-MM texts
# Sys.time()
fit_model_3_censor <- brm(model_3_censor,
                      data = surv_sample_scaled,
                      bernoulli(link = "logit"),
                      prior = prior_model_1,
                      chains = 4, iter = 4000, warmup=1000, seed=1487,
                      control = list(adapt_delta = 0.9),
                      cores = parallel::detectCores())
# Sys.time() # 20 min
saveRDS(fit_model_3_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_3_censor.rds"))
# fit_model_3_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_3_censor.rds"))
```
  
   
### Diagnostics and Assessing Fit

  
```{r models_diagnostics, eval=FALSE, echo = FALSE, include = FALSE}

# # first, using Stan built-in posterior visualization applet
if (interactive()) launch_shinystan(fit_model_1_censor)

if (interactive()) launch_shinystan(fit_model_2_censor)

if (interactive()) launch_shinystan(fit_model_3_censor)
```
  
  Using the shinystan app, everything is okay...
  *Numbers of divergent transitions? as a proportion?* Check - none
  *Are the divergences clustered within the joint distributional space?* N/A
  *Are the distributions of the point estimates are relatively normal?* Relatively, yes

LOO cross-validation will take too long!
Use k-fold cross-validation estimations to detect bias in the results. Stratify by the outcome, firsttrial. There are only about 140 of those, so to keep decent n's across folds, choose 7 folds (~20 firsttrial==1 per subset)
 
```{r cv_checks, eval = TRUE, echo = FALSE}
folds_expl <- loo::kfold_split_stratified(K=7, x=surv_sample_scaled$firsttrial)
kfold_model_1_censor <- kfold(fit_model_1_censor,
                                    folds = folds_expl, K = 7,
                                    ndraws = 1000,
                                    cores = 3)
saveRDS(kfold_model_1_censor, here::here("Witchtrial_diffusion","out_data", "kfold_model_1_censor.rds"))
# Sys.time()
# kfold_model_1_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "kfold_model_1_censor.rds"))
kfold_model_2_censor <- kfold(fit_model_2_censor,
                              folds = folds_expl, K = 7,
                              ndraws = 1000,
                              cores = 3)
saveRDS(kfold_model_2_censor, here::here("Witchtrial_diffusion","out_data", "kfold_model_2_censor.rds"))
# Sys.time()
# kfold_model_2_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "kfold_model_2_censor.rds"))
kfold_model_3_censor <- kfold(fit_model_3_censor,
                              folds = folds_expl, K = 7,
                              ndraws = 1000, cores = 3)
saveRDS(kfold_model_3_censor, here::here("Witchtrial_diffusion","out_data", "kfold_model_3_censor.rds"))
# Sys.time()
# kfold_model_3_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "kfold_model_3_censor.rds"))

print(kfold_model_1_censor)
print(kfold_model_2_censor)
print(kfold_model_3_censor)

loo_compare(kfold_model_1_censor, 
            kfold_model_2_censor,
            kfold_model_3_censor)

control_params(fit_model_1_censor)[c("adapt_delta", "stepsize")]
control_params(fit_model_2_censor)[c("adapt_delta", "stepsize")]
control_params(fit_model_3_censor)[c("adapt_delta", "stepsize")]


```
  

### Main results: Tables

   
```{r models_output, eval=TRUE, echo = FALSE, include = FALSE}

var.order.m1 <- c("b_MM_sum10", "b_diffMM_10",
                  "b_tried_sum10", "b_difftried_10",
                  "b_degree", "b_between", "b_eigen",
                  "b_battles_sum10", "b_diffbattle_10", 
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_grid_0")
latex.label.order.m1 <- c("Malleus editions, past 10 years", 
                        "Malleus editions, distance-weighted",
                        "Total persons tried, past 10 years", "Total persons tried, distance-weighted", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, past 10 years", "Battles, distance-weighted",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Low coverage area")

var.order.m2 <- c("b_texts_sum10", "b_difftexts_10",
                  "b_tried_sum10", "b_difftried_10", 
                  "b_degree", "b_between", "b_eigen",
                  "b_battles_sum10", "b_diffbattle_10", 
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_grid_0")
latex.label.order.m2 <- c("All text editions, past 10 years", 
                        "All text editions, distance-weighted",
                        "Total persons tried, past 10 years", "Total persons tried, distance-weighted", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, past 10 years", "Battles, distance-weighted",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Low coverage area")

var.order.m3 <- c("b_texts_not_sum10", "b_difftexts_not_10",
                  "b_tried_sum10", "b_difftried_10", 
                  "b_degree", "b_between", "b_eigen",
                  "b_battles_sum10", "b_diffbattle_10", 
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_grid_0")
latex.label.order.m3 <- c("Non-Malleus editions, past 10 years", 
                        "Non-Malleus editions, distance-weighted",
                        "Total persons tried, past 10 years", "Total persons tried, distance-weighted", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, past 10 years", "Battles, distance-weighted",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Low coverage area")

# model output:

posterior_m1 <- as_draws_array(fit_model_1_censor)
m1_table_raw <- summarize_draws(posterior_m1, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

posterior_m2 <- as_draws_array(fit_model_2_censor)
m2_table_raw <- summarize_draws(posterior_m2, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

posterior_m3 <- as_draws_array(fit_model_3_censor)
m3_table_raw <- summarize_draws(posterior_m3, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m1_table <- m1_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m1, "b_Intercept")) %>%
  slice(match(c(var.order.m1, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m1, "Intercept")) %>%
  column_to_rownames(var = "variable")

m2_table <- m2_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m2, "b_Intercept")) %>%
  slice(match(c(var.order.m2, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m2, "Intercept")) %>%
  column_to_rownames(var = "variable")

m3_table <- m3_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m3, "b_Intercept")) %>%
  slice(match(c(var.order.m3, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m3, "Intercept")) %>%
  column_to_rownames(var = "variable")

write.csv(round(m1_table,2), here::here("Witchtrial_diffusion","out_tables","table_3_model1.csv"), row.names=TRUE)
write.csv(round(m2_table,2), here::here("Witchtrial_diffusion","out_tables","table_3_model2.csv"), row.names=TRUE)
write.csv(round(m3_table,2), here::here("Witchtrial_diffusion","out_tables","table_3_model3.csv"), row.names=TRUE)
```
  
Tables of results for each model:

**1. with *Malleus maleficarum* **
```{r table_3_model1, eval=TRUE, results='asis', echo=FALSE}
stargazer(m1_table, type = "html", summary = FALSE, align=TRUE, digits=2, out=here::here("Witchtrial_diffusion","out_tables","table_3_model1.htm"))
```
<br>
**2. with all demonological texts**
```{r table_3_model2, eval=TRUE, results='asis', echo=FALSE}
stargazer(m2_table, type = "html", summary = FALSE, align=TRUE, digits=2, out=here::here("Witchtrial_diffusion","out_tables","table_3_model2.htm"))
```
<br>
**3. with all texts excluding *Malleus***
```{r table_3_model3, eval=FALSE, results='asis', echo=FALSE}
stargazer(m3_table, type = "html", summary = FALSE, align=TRUE, digits=2, out=here::here("Witchtrial_diffusion","out_tables","table_3_model3.htm"))
```

### Main results: Figures

Visualize in interpretable, meaningful ways by presenting predicted probabilities, first differences, or relative risks for different counterfactuals.  

```{r Figs_areas_setup, eval = TRUE, echo = FALSE}

fig.var.order.m1 <- c("b_MM_sum10", "b_diffMM_10",
                  "b_tried_sum10", "b_difftried_10",
                  "b_degree", "b_eigen",
                  "b_battles_sum10", "b_diffbattle_10")
fig.label.order.m1 <- c("Malleus editions,\npast 10 years", 
                        "Malleus editions,\ndistance-weighted",
                        "Total persons tried,\npast 10 years", "Total persons tried,\ndistance-weighted", 
                        "Degree", "Eigenvector",
                        "Battles, past 10 years", "Battles, distance-weighted")

fig.var.order.m2 <- c("b_texts_sum10", "b_difftexts_10",
                  "b_tried_sum10", "b_difftried_10", 
                  "b_degree", "b_eigen",
                  "b_battles_sum10", "b_diffbattle_10")

fig.label.order.m2 <- c("All text editions,\npast 10 years", 
                        "All text editions,\ndistance-weighted",
                        "Total persons tried,\npast 10 years", "Total persons tried, \ndistance-weighted", 
                        "Degree", "Eigenvector",
                        "Battles, past 10 years", "Battles, distance-weighted")

fig.var.order.m3 <- c("b_texts_not_sum10", "b_difftexts_not_10",
                  "b_tried_sum10", "b_difftried_10", 
                  "b_degree", "b_eigen",
                  "b_battles_sum10", "b_diffbattle_10")

fig.label.order.m3 <- c("All non-Malleus editions,\npast 10 years", 
                        "All non-Malleus editions,\ndistance-weighted",
                        "Total persons tried,\npast 10 years", "Total persons tried, \ndistance-weighted", 
                        "Degree", "Eigenvector",
                        "Battles, past 10 years", "Battles, distance-weighted")
```


  First differences for change in probability of beginning trials (posterior means, with 50% and 89% intervals) for models 1 and 2
  
```{r Figs_areas_plots, eval = TRUE, echo = FALSE, fig.height = 10, fig.width = 13}

color_scheme_set("gray")

p.areas.m1 <- mcmc_areas(posterior_m1, pars = rev(c(fig.var.order.m1)),
                         prob = 0.5, # 50% intervals
                         prob_outer = 0.89, # 89%
                         point_est = "mean") +
  vline_at(0, linetype = 3, size = 0.25) +
  ggplot2::scale_y_discrete() +
  ggplot2::scale_y_discrete(labels=rev(fig.label.order.m1)) +
  ggplot2::labs(
    title = "", # Model 1 posterior distributions
    subtitle = "" # Posterior means, with 50% and 89% intervals
  )

# p.areas.m2 <- mcmc_areas(posterior_m2, pars = rev(c(fig.var.order.m2)),
#                             prob = 0.5, # 50% intervals
#                             prob_outer = 0.89, # 89%
#                             point_est = "mean") +
#   vline_at(0, linetype = 3, size = 0.25) +
#   ggplot2::scale_y_discrete() +
#   ggplot2::scale_y_discrete(labels=rev(fig.label.order.m2)) +
#   ggplot2::labs(
#     title = "", # Model 2 posterior distributions
#     subtitle = "" # Posterior means, with 50% and 89% intervals
#   )
# 
# p.areas.m3 <- mcmc_areas(posterior_m3, pars = rev(c(fig.var.order.m3)),
#                             prob = 0.5, # 50% intervals
#                             prob_outer = 0.89, # 89%
#                             point_est = "mean") +
#   vline_at(0, linetype = 3, size = 0.25) +
#   ggplot2::scale_y_discrete() +
#   ggplot2::scale_y_discrete(labels=rev(fig.label.order.m3)) +
#   ggplot2::labs(
#     title = "", # Model 3 posterior distributions
#     subtitle = "" # Posterior means, with 50% and 89% intervals
#   )

plot(p.areas.m1 + mytheme)

tiff(here::here("Witchtrial_diffusion", "out_figures", "Figs4_areas_plots-1.tiff"), width = 14, height = 8, units = 'in', res = 300)
plot(p.areas.m1 + mytheme)
dev.off()

# p.areas.m2 + mytheme

# p.areas.m3 + mytheme
```
  

## Robustness

### Issue 1: Endogeneity of printing the Malleus to the spread of witch trials

```{r robust_endogeneity, eval= TRUE, echo=FALSE}

# Endogeneity of printing and trials
trials_laea %>%
  filter(city %in% c("Cologne", "Frankfurt", "Lyon", "Metz", "Nuremberg", "Paris", "Speyer", "Strasbourg", "Venice")) %>%
  filter(year<1680) %>% dplyr::select(city, year) %>% unique() %>%
  group_by(city) %>%
  summarize(first = min(year), last = max(year), total = n())
# Of Malleus-printing cities, Cologne, Lyon, Metz, Nuremberg, Paris, and Venice had trials before 1680
panel_dataset_net %>%
  dplyr::select(city) %>%
  filter(city %in% c("Cologne", "Frankfurt", "Lyon", "Metz", "Nuremberg", "Paris", "Speyer", "Strasbourg", "Venice")) %>%
  unique()
# Of Malleus-printing cities, Cologne, Metz, Nuremberg, and Speyer are in the panel - and Speyer didn't have any recorded trials.
panel_dataset_net %>%
  filter(city %in% c("Cologne", "Frankfurt", "Lyon", "Metz", "Nuremberg", "Paris", "Speyer", "Strasbourg", "Venice")) %>%
  filter(triedbin == 1) %>%
  dplyr::select(year, city, tried, deaths) %>%
  write.csv(here::here("Witchtrial_diffusion", "out_tables", "printer_trials_table.csv"), row.names=FALSE)
# Three of the six printing cities that did have a trial (Cologne, Nuremberg, Metz) all had their first trial in the 1300s. Nuremberg had repeated trials (lots!). None of the printing cities in the panel had trials after 1489. Cologne, Metz, and Nuremberg had trials in 1487, 1488, and 1489. So if demand drove printing, we should have seen further trials in these cities - but we don't.

# Addressing endogeneity through model design: Malleus lagged on trials vs trials lagged on Malleus? There is some evidence for this based on data exploration (see histograms), potentially aligned with a temporal split in the sample. There is a gap in Malleus printing 1523-74. The sample can be split from 10 years after the last edition in the first wave, 1534. But it doesn't make sense to do malleus ~ f(trials) at the city level, because we already know that most cities don't print; it only makes sense to do this at the pooled, global level.

trials_net_matrix <- panel_dataset_net %>%
  group_by(year) %>%
  summarize(firsttrials_ann = sum(ifelse(firsttrial==1, 1, 0))) %>%
  # ignore firsttrial==0 (hasn't happened) and firsttrial==2 (already happened)
  ungroup() %>%
  # before lag, make sure to include all years
  complete(year=1300:1650) %>%
  replace_na(list(firsttrials_ann = 0)) %>%
  arrange(year) %>%
  mutate(firsttrials_lag1=lag(firsttrials_ann, n=1, default = NA)) %>% # lag by 1 year
  mutate(firsttrials_sum5 = rollapplyr(firsttrials_ann, width=list(-1:-5), sum, # 5 year moving sum
                                 na.rm=TRUE, fill=NA, align="right")) %>%
  mutate(firsttrials_sum10 = rollapplyr(firsttrials_ann, width=list(-1:-10), sum, # 10 year moving sum
                                  na.rm=TRUE, fill=NA, align="right")) %>%
  mutate(firsttrials_sum20 = rollapplyr(firsttrials_ann, width=list(-1:-20), sum, # 20 year moving sum
                                  na.rm=TRUE, fill=NA, align="right")) %>%
  mutate(firsttrials_sum30 = rollapplyr(firsttrials_ann, width=list(-1:-30), sum, # 30 year moving sum
                                  na.rm=TRUE, fill=NA, align="right")) %>%
  merge(trials_net_matrix, by = c("year"))

# one big table of annual values
annual_table <- merge(battles_matrix, malleus_matrix, by.x = "year", by.y="Year") %>%
  merge(., texts_matrix, by.x = "year", by.y="Year") %>%
  merge(., trials_net_matrix, by = c("year"))

# set up pooled, annualized regressions

model_pooled <- as.formula(firsttrials_ann ~ MM_sum10 +
                               tried_sum10 + 
                               battles_sum10)
model_pooled_reverse <- as.formula(MM_ann ~ firsttrials_sum10 +
                               tried_sum10 +
                               battles_sum10)

prior_model_pooled <- c(
  prior(student_t(7, 0, 2.5), class=b), # all vars, student's t
  prior(student_t(4, 0, 1), class=Intercept)) # random fx, half-t

var.order.m1rev <- c("b_firsttrials_sum10", "b_tried_sum10",
                  "b_battles_sum10")
latex.label.order.m1rev <- c("First trials, past 10 years", "Total persons tried, past 10 years", "Battles, past 10 years")

fit_model_pooled_1400_1534 <- brm(model_pooled,
                       data = annual_table[annual_table$year>1399&annual_table$year<1535,],
                       negbinomial(link = "log"),
                       prior = prior_model_pooled,
                       chains = 4, iter = 3000, warmup=500, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
saveRDS(fit_model_pooled_1400_1534, here::here("Witchtrial_diffusion","out_data", "fit_model_pooled_1400_1534.rds"))
# fit_model_pooled_1400_1534 <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_pooled_1400_1534.rds"))

fit_model_pooled_1535_1650 <- brm(model_pooled,
                       annual_table[annual_table$year>1534&annual_table$year<1651,],
                       negbinomial(link = "log"),
                       prior = prior_model_pooled,
                       chains = 4, iter = 3000, warmup=500, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
saveRDS(fit_model_pooled_1535_1650, here::here("Witchtrial_diffusion","out_data", "fit_model_pooled_1535_1650.rds"))
# fit_model_pooled_1535_1650 <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_pooled_1535_1650.rds"))

fit_model_pooledrev_1400_1534 <- brm(model_pooled_reverse,
                       data = annual_table[annual_table$year>1399&annual_table$year<1535,],
                       negbinomial(link = "log"),
                       prior = prior_model_pooled,
                       chains = 4, iter = 3000, warmup=500, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
saveRDS(fit_model_pooledrev_1400_1534, here::here("Witchtrial_diffusion","out_data", "fit_model_pooledrev_1400_1534.rds"))
# fit_model_pooledrev_1400_1534 <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_pooledrev_1400_1534.rds"))

posterior_m1rev_1400_1534 <- as_draws_array(fit_model_pooledrev_1400_1534)
m1rev_1400_1534_table_raw <- summarize_draws(posterior_m1rev_1400_1534, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m1rev_1400_1534_table <- m1rev_1400_1534_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m1rev, "b_Intercept")) %>%
  slice(match(c(var.order.m1rev, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m1rev, "Intercept")) %>%
  column_to_rownames(var = "variable")

write.csv(round(m1rev_1400_1534_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model1rev_1400_1534_table.csv"), row.names=TRUE)

fit_model_pooledrev_1535_1650 <- brm(model_pooled_reverse,
                       annual_table[annual_table$year>1534&annual_table$year<1680,],
                       negbinomial(link = "log"),
                       prior = prior_model_pooled,
                       chains = 4, iter = 3000, warmup=500, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
saveRDS(fit_model_pooledrev_1535_1650, here::here("Witchtrial_diffusion","out_data", "fit_model_pooledrev_1535_1650.rds"))
# fit_model_pooledrev_1535_1650 <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_pooledrev_1535_1650.rds"))

posterior_m1rev_1535_1650 <- as_draws_array(fit_model_pooledrev_1535_1650)
m1rev_1535_1650_table_raw <- summarize_draws(posterior_m1rev_1535_1650, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m1rev_1535_1650_table <- m1rev_1535_1650_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m1rev, "b_Intercept")) %>%
  slice(match(c(var.order.m1rev, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m1rev, "Intercept")) %>%
  column_to_rownames(var = "variable")

write.csv(round(m1rev_1535_1650_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model1rev_1535_1650_table.csv"), row.names=TRUE)

fit_model_pooled <- brm(model_pooled,
                       annual_table[annual_table$year>1399&annual_table$year<1680,],
                       negbinomial(link = "log"),
                       prior = prior_model_pooled,
                       chains = 4, iter = 3000, warmup=500, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
saveRDS(fit_model_pooled, here::here("Witchtrial_diffusion","out_data", "fit_model_pooled.rds"))
# fit_model_pooled <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_pooled.rds"))

fit_model_pooledrev <- brm(model_pooled_reverse,
                       annual_table[annual_table$year>1399&annual_table$year<1680,],
                       negbinomial(link = "log"),
                       prior = prior_model_pooled,
                       chains = 4, iter = 3000, warmup=500, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
saveRDS(fit_model_pooledrev, here::here("Witchtrial_diffusion","out_data", "fit_model_pooledrev.rds"))
# fit_model_pooledrev <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_pooledrev.rds"))

posterior_m1rev <- as_draws_array(fit_model_pooledrev)
m1rev_table_raw <- summarize_draws(posterior_m1rev, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m1rev_table <- m1rev_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m1rev, "b_Intercept")) %>%
  slice(match(c(var.order.m1rev, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m1rev, "Intercept")) %>%
  column_to_rownames(var = "variable")

write.csv(round(m1rev_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model1rev_table.csv"), row.names=TRUE)

```
  
  We run six models: first printing wave and second printing wave with first trial predicted by Malleus editions, then first printing wave and second printing wave with Malleus editions predicted by first trials, then all years for both versions -- all using 10 year lags.
  First trials are associated with prior editions of the Malleus only in the second printing wave.
  Prior trials are not associated with editions of the Malleus in any specification, and neither are confessional battles - even the estimated coefficients are basically 0!
  
  We could try to run something where trials in the vicinity are a covariate for editions in a printing city, but that doesn't quite make sense, as there are so many other factors for printing generally in a city, and then the specific printers and their portfolios...it would be a whole other project on printer decision-making.
  
  One thing we can do is put printing the Malleus in context, by normalizing it (well, relativizing it) to printing generally in the printing cities. We have collected relative volume of the Malleus, as the total number of editions in the printing city in the publishing year plus 9 preceding years divided by the total number of results for a USTC search in the printing city in the publishing year. This should be a liberal measure, since if it is lagged at all it would not account for the non-Malleus volume in those succeeding years. But what it does measure fairly clearly is the novelty of the Malleus when it appeared, and therefore its notability and potential staying power. If it's part of a cacophony of printing (smaller values), that should be different than if it is more visible among alternatives (larger values). These values can be subbed in for the edition counts. (We also have volume in that city in that year plus 9 preceding years (totaling one decade) (USTC10year), and this again for the specific printer).
  
```{r robust_normprinting, eval= TRUE, echo=FALSE}
# get relative printing data
ustc_printing <- read_csv(here::here("Witchtrial_diffusion", "USTC_Editions-by-city_Data-entry.csv")) %>% dplyr::select(-c(Year, Location, StdName, Printer))
# add to Malleus data
malleus2 <- merge(malleus, ustc_printing, by = c("USTC"))
# calculate rolling volumes
malleus_matrix2 <- malleus2 %>%
  dplyr::select(Year, StdName, volumeyear) %>%
  # When it's the same year in the same city, we've already aggregated! Matters for 1588 Frankfurt and 1574 and 1576 in Venice
  distinct() %>%
  group_by(Year) %>% 
  summarize(MMvol_ann = prod(volumeyear)) %>% # total edition volume in a year
  # before lag, make sure all years included
  complete(Year=1300:1650) %>% # max USTC year
  arrange(Year) %>%
  mutate(MMvol_lag1=lag(MMvol_ann, n=1, default = NA)) %>% # lag by 1 year
  # use product across year, because should never be >1 since it's a proportion
  mutate(MMvol_prod5 = rollapplyr(MMvol_ann, width=list(-1:-5), prod, # 5 year moving sum
                                na.rm=TRUE, fill=NA, align="right")) %>%
  mutate(MMvol_prod10 = rollapplyr(MMvol_ann, width=list(-1:-10), prod, # 10 year moving sum
                                 na.rm=TRUE, fill=NA, align="right")) %>%
  mutate(MMvol_prod20 = rollapplyr(MMvol_ann, width=list(-1:-20), prod, # 20 year moving sum
                                 na.rm=TRUE, fill=NA, align="right")) %>%
  mutate(across(MMvol_prod5:MMvol_prod20, ~ifelse(.==1,0,.))) %>%
  mutate(across(MMvol_ann:MMvol_prod20, ~.*100)) %>% # transform to percents
  replace_na(list(MMvol_ann = 0, MMvol_lag1 = 0))
# Add list columns of the volumes in time t and t-1:
malleus_matrix2 <- malleus2 %>%
  mutate(volumeyearpct = volumeyear*100) %>% # from proportion to percent
  dplyr::select(Year, volumeyearpct) %>% 
  group_by(Year) %>%
  nest(., MMVolumes_0 = c(volumeyearpct)) %>%
  full_join(malleus_matrix2, by="Year") %>% # join list to rest of Malleus data
  ungroup() %>%
  arrange(Year) %>%
  mutate(MMVolumes_1=lag(MMVolumes_0, n=1, default = list(NULL))) # lag by 1 year

# flatten from tibble to list
MMVolumes_1 <- vector("list", dim(malleus_matrix2)[1])
for (i in 1:dim(malleus_matrix2)[1]){
  flat.ids <- malleus_matrix2$MMVolumes_1[i][[1]]
  flat.ids <- flat.ids[!sapply(flat.ids,is.null)]
  if(length(flat.ids)!=0){
    MMVolumes_1[[i]] <- Reduce(rbind, flat.ids)
  } else {MMVolumes_1[[i]] <- list(NULL)}
}
malleus_matrix2$MMVolumes_1 <- MMVolumes_1

# merge MM volume calculations into the main data (1300-1650 CE)
panel_dataset_net <- panel_dataset_net %>%
  left_join(malleus_matrix2, by = c("year"="Year")) %>% # keep all rows of x (main df), add y (volumes)
  arrange(year, bhpr_runningnr)
# calculate spatially-adjusted values, as before with counts of editions
panel_dataset_net <- panel_dataset_net %>%
  mutate(diffMMvol_1 = rep(NA, dim(.)[1]))
for (i in 1: dim(panel_dataset_net)[1]){
  # if prior MM editions for t=1
  if(!is.null(panel_dataset_net$MMCopies_1[[i]][[1]])){
    dist.subs <- dist_euc_matrix[panel_dataset_net$newID[i], 
                                 panel_dataset_net$MMPrinters_1[[i]]]
    # see note at start of section on spatial diffusion calculations
    dist.subs[dist.subs<=0.1] <- 0.1
    panel_dataset_net$diffMMvol_1[i] <- 
      sum(panel_dataset_net$MMVolumes_1[i][[1]]/sqrt(dist.subs), 
                                             na.rm=TRUE)
  } else {panel_dataset_net$diffMMvol_1[i] <- 0} # if no prior for t=1, fill 0
}

panel_dataset_net <- panel_dataset_net %>%
  arrange(year) %>%
  group_by(bhpr_runningnr) %>% 
  # create moving sums for different windows
  mutate(diffMMvol_5 = rollapplyr(diffMMvol_1, 5, sum, 
                               na.rm=TRUE, fill=NA, partial=TRUE, align="right"),
         diffMMvol_10 = rollapplyr(diffMMvol_1, 10, sum, 
                                na.rm=TRUE, fill=NA, partial=TRUE, align="right"),
         diffMMvol_20 = rollapplyr(diffMMvol_1, 20, sum, 
                                na.rm=TRUE, fill=NA, partial=TRUE, align="right")) %>%
  ungroup() %>%
  arrange(bhpr_runningnr, year)

vol_vars <- c("MMvol_lag1", "MMvol_prod5", "MMvol_prod10", "MMvol_prod20",
              "diffMMvol_1", "diffMMvol_5", "diffMMvol_10", "diffMMvol_20")

# set up censored sample again
surv_dataset_net <- panel_dataset_net %>%
  filter(firsttrial==0|firsttrial==1) %>% # censor at first trial
  mutate(time=year-1400) # create time since first observation

surv_sample_vol <- surv_dataset_net[names(surv_dataset_net) %in% c(model_vars, vol_vars)]
surv_sample_vol <- surv_sample_vol[complete.cases(surv_sample_vol),]
surv_sample_vol <- filter(surv_sample_vol, year>=1400&year<1651)

surv_sample_vol_scaled <- surv_sample_vol %>%
  mutate_at(all_of(c(scale_vars, vol_vars)), ~scale(.) %>% as.vector)

# run model with volumes!
model_4_censor <- as.formula(firsttrial ~ MMvol_prod10 + diffMMvol_10 +
                               tried_sum10 + difftried_10 + 
                               battles_sum10 + diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects
# Sys.time() #
# fit_model_4_censor <- brm(model_4_censor,
#                        data = surv_sample_vol_scaled,
#                        bernoulli(link = "logit"),
#                        prior = prior_model_1,
#                        chains = 4, iter = 4000, warmup=1000, seed=1487,
#                        control = list(adapt_delta = 0.9),
#                        cores = parallel::detectCores())
# Sys.time() # 55 min
# saveRDS(fit_model_4_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_4_censor.rds"))
fit_model_4_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_4_censor.rds"))

var.order.m4 <- c("b_MMvol_prod10", "b_diffMMvol_10",
                  "b_tried_sum10", "b_difftried_10",
                  "b_degree", "b_between", "b_eigen",
                  "b_battles_sum10", "b_diffbattle_10", 
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_grid_0")
latex.label.order.m4 <- c("Malleus as volume, past 10 years", 
                        "Malleus as volume, distance-weighted",
                        "Total persons tried, past 10 years", "Total persons tried, distance-weighted", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, past 10 years", "Battles, distance-weighted",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Low coverage area")

posterior_m4 <- as_draws_array(fit_model_4_censor)
m4_table_raw <- summarize_draws(posterior_m4, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m4_table <- m4_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m4, "b_Intercept")) %>%
  slice(match(c(var.order.m4, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m4, "Intercept")) %>%
  column_to_rownames(var = "variable")

write.csv(round(m4_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model4.csv"), row.names=TRUE)
```

Model 1, but with printing volume:
negative relationship with 10-year volume lag; none with distance-scaled
negative relationship with 10-year trial lag, but positive with distance-scaled
positive relationship with 10-year battle lag, but negative with distance-scaled
positive with degree and eigenvector
negative with Hanse, positive with free/imperial, none with catholic; none with climate

### Issues 2-5: Different specifications

```{r robust_specifications_timescope, eval= TRUE, echo=FALSE}
# event-history model (see "proof of concept" section above)

# model from 1450 instead of 1400 - only start with the advent of printing
# should the scaling be re-done to just within this sample? or just slice off obs from the main sample? results are the same
surv_sample_1450 <- filter(surv_sample, year>=1450&year<1651)

surv_sample_scaled_1450 <- surv_sample_1450 %>%
  mutate_at(all_of(scale_vars), ~scale(.) %>% as.vector)

# Sys.time()
fit_model_1_censor_1450 <- brm(model_1_censor,
                       data = surv_sample_scaled_1450,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
Sys.time() #
saveRDS(fit_model_1_censor_1450, here::here("Witchtrial_diffusion","out_data", "fit_model_1_censor_1450.rds"))
fit_model_1_censor_1450 <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_1_censor_1450.rds"))

posterior_m1_1450 <- as_draws_array(fit_model_1_censor_1450)
m1_1450_table_raw <- summarize_draws(posterior_m1_1450, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m1_1450_table <- m1_1450_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m1, "b_Intercept")) %>%
  slice(match(c(var.order.m1, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m1, "Intercept")) %>%
  column_to_rownames(var = "variable")

write.csv(round(m1_1450_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model1_1450.csv"), row.names=TRUE)

table(surv_sample$MM_sum10) # values 0 to 7
table(surv_sample_scaled$MM_sum10)
table(surv_sample_scaled_1450$MM_sum10) # 0 values reduced by about 25%

table(surv_sample_scaled$tried_sum10)
table(surv_sample_scaled_1450$tried_sum10) # 0 values reduced by about 66%
```

  no relationship with 10-year count lag; none with distance-scaled
negative relationship with 10-year trial lag, but positive with distance-scaled
positive relationship with 10-year battle lag, but negative with distance-scaled
only centrality relationship is positive with eigenvector
negative with Hanse, none with free/imperial or catholic; none with climate

```{r robust_specifications_lags, eval= TRUE, echo=FALSE}
# vary the temporal lag from 10 to 5 and to 20?
model_5_censor <- as.formula(firsttrial ~ MM_sum5 + diffMM_5 +
                               tried_sum5 + difftried_5 + 
                               battles_sum5 + diffbattle_5 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects
model_6_censor <- as.formula(firsttrial ~ MM_sum20 + diffMM_20 +
                               tried_sum20 + difftried_20 + 
                               battles_sum20 + diffbattle_20 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

model_vars_lag5 <- c("firsttrial",
               "battles_sum5", "tried_sum5", "MM_sum5",
               "diffbattle_5", "difftried_5", "diffMM_5",
               "degree", "between", "eigen",
               "hanse_pagel", "freeimp_jacob", "cath_cheney",
               "prc_adj_lag", "temp_adj_lag", "year", 
               "bhpr_runningnr", "grid_0")
surv_sample_lag5 <- surv_dataset_net[names(surv_dataset_net) %in% model_vars_lag5]
surv_sample_lag5 <- surv_sample_lag5[complete.cases(surv_sample_lag5),]
surv_sample_lag5 <- filter(surv_sample_lag5, year>=1400&year<1651)
scale_vars_lag5 <- c("battles_sum5", "tried_sum5", "MM_sum5", 
                "diffbattle_5", "difftried_5", "diffMM_5", 
                "prc_adj_lag", "temp_adj_lag") 
surv_sample_scaled_lag5 <- surv_sample_lag5 %>%
  mutate_at(all_of(scale_vars_lag5), ~scale(.) %>% as.vector)

# Sys.time()
fit_model_5_censor <- brm(model_5_censor,
                       data = surv_sample_scaled_lag5,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time() # 1 hr
saveRDS(fit_model_5_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_5_censor.rds"))
# fit_model_5_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_5_censor.rds"))

model_vars_lag20 <- c("firsttrial",
               "battles_sum20", "tried_sum20", "MM_sum20",
               "diffbattle_20", "difftried_20", "diffMM_20",
               "degree", "between", "eigen",
               "hanse_pagel", "freeimp_jacob", "cath_cheney",
               "prc_adj_lag", "temp_adj_lag", "year", 
               "bhpr_runningnr", "grid_0")
surv_sample_lag20 <- surv_dataset_net[names(surv_dataset_net) %in% model_vars_lag20]
surv_sample_lag20 <- surv_sample_lag20[complete.cases(surv_sample_lag20),]
surv_sample_lag20 <- filter(surv_sample_lag20, year>=1400&year<1651)
scale_vars_lag20 <- c("battles_sum20", "tried_sum20", "MM_sum20", 
                "diffbattle_20", "difftried_20", "diffMM_20", 
                "prc_adj_lag", "temp_adj_lag") 
surv_sample_scaled_lag20 <- surv_sample_lag20 %>%
  mutate_at(all_of(scale_vars_lag20), ~scale(.) %>% as.vector)

fit_model_6_censor <- brm(model_6_censor,
                       data = surv_sample_scaled_lag20,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time() # 1 hr
saveRDS(fit_model_6_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_6_censor.rds"))
# fit_model_6_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_6_censor.rds"))

var.order.m5 <- c("b_MM_sum5", "b_diffMM_5",
                  "b_tried_sum5", "b_difftried_5",
                  "b_degree", "b_between", "b_eigen",
                  "b_battles_sum5", "b_diffbattle_5", 
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_grid_0")
latex.label.order.m5 <- c("Malleus editions, past 5 years", 
                        "Malleus editions, distance-weighted",
                        "Total persons tried, past 5 years", "Total persons tried, distance-weighted", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, past 5 years", "Battles, distance-weighted",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Low coverage area")

var.order.m6 <- c("b_MM_sum20", "b_diffMM_20",
                  "b_tried_sum20", "b_difftried_20",
                  "b_degree", "b_between", "b_eigen",
                  "b_battles_sum20", "b_diffbattle_20", 
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_grid_0")
latex.label.order.m6 <- c("Malleus editions, past 20 years", 
                        "Malleus editions, distance-weighted",
                        "Total persons tried, past 20 years", "Total persons tried, distance-weighted", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, past 20 years", "Battles, distance-weighted",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Low coverage area")

posterior_m5 <- as_draws_array(fit_model_5_censor)
m5_table_raw <- summarize_draws(posterior_m5, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

posterior_m6 <- as_draws_array(fit_model_6_censor)
m6_table_raw <- summarize_draws(posterior_m6, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m5_table <- m5_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m5, "b_Intercept")) %>%
  slice(match(c(var.order.m5, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m5, "Intercept")) %>%
  column_to_rownames(var = "variable")

m6_table <- m6_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m6, "b_Intercept")) %>%
  slice(match(c(var.order.m6, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m6, "Intercept")) %>%
  column_to_rownames(var = "variable")

write.csv(round(m5_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model5.csv"), row.names=TRUE)

write.csv(round(m6_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model6.csv"), row.names=TRUE)

```

  no relationship with 5-year count lag; none with distance-scaled
negative relationship with 5-year trial lag, but positive with distance-scaled
positive relationship with 5-year battle lag, but negative with distance-scaled
positive centrality with degree and eigenvector
negative with Hanse, positive with free/imperial, none with catholic; none with climate

  positive relationship with 20-year count lag; negative with distance-scaled
negative relationship with 20-year trial lag, but positive with distance-scaled
positive relationship with 20-year battle lag, but negative with distance-scaled
positive with degree and eigenvector
negative with Hanse, none with free/imperial or catholic; none with climate
  


```{r robust_specifications_fixedfx, eval= TRUE, echo=FALSE}
## city fixed effects (no pooling) instead of random effects (partial pooling)

model_7_censor <- as.formula(firsttrial ~ MM_sum10 + diffMM_10 +
                               tried_sum10 + difftried_10 + 
                               battles_sum10 + diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               as.factor(bhpr_runningnr))  # city fixed effects

prior_model_7 <- c(
  prior(student_t(7, 0, 2.5), class=b)) # all vars, student's t - including the city fixed effects

# Sys.time()
fit_model_7_censor <- brm(model_7_censor,
                       data = surv_sample_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_7,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time() # 12 hrs
saveRDS(fit_model_7_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_7_censor.rds"))
# fit_model_7_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_7_censor.rds"))

posterior_m7 <- as_draws_array(fit_model_7_censor)
m7_table_raw <- summarize_draws(posterior_m7, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m7_table <- m7_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m1)) %>%
  slice(match(c(var.order.m1), variable)) %>%
  mutate(variable=c(latex.label.order.m1)) %>%
  column_to_rownames(var = "variable")

write.csv(round(m7_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model7.csv"), row.names=TRUE)
```

  These results are the same for our key variables, but it takes 12x as long in run-time, and there's no more precision in the estimates. There's no interpretive benefit. And it actually doesn't match the data generating structure as well, so there's no principled reason to choose fixed effects. Additionally, fixed effects means choosing a referent city, and we have no principled reason to choose any particular city (in this case, it defaults to city 1, Aachen).


```{r robust_specifications_isolatelags, eval= TRUE, echo=FALSE}
# just temporal lags, then just spatiotemporal lags

model_8_censor <- as.formula(firsttrial ~ MM_sum10 +
                               tried_sum10 + 
                               battles_sum10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

model_9_censor <- as.formula(firsttrial ~ diffMM_10 +
                               difftried_10 + 
                               diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

# Sys.time()
fit_model_8_censor <- brm(model_8_censor,
                       data = surv_sample_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time() #  40 min
saveRDS(fit_model_8_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_8_censor.rds"))
# fit_model_8_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_8_censor.rds"))

# Sys.time()
fit_model_9_censor <- brm(model_9_censor,
                       data = surv_sample_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time() # 1 hr
saveRDS(fit_model_9_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_9_censor.rds"))
# fit_model_9_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_9_censor.rds"))

var.order.m8 <- c("b_MM_sum10", 
                  "b_tried_sum10", 
                  "b_degree", "b_between", "b_eigen",
                  "b_battles_sum10",  
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_grid_0")
latex.label.order.m8 <- c("Malleus editions, past 10 years",
                        "Total persons tried, past 10 years", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, past 10 years",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Low coverage area")

var.order.m9 <- c("b_diffMM_10",
                  "b_difftried_10",
                  "b_degree", "b_between", "b_eigen",
                  "b_diffbattle_10", 
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_grid_0")
latex.label.order.m9 <- c("Malleus editions, distance-weighted",
                        "Total persons tried, distance-weighted", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, distance-weighted",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Low coverage area")

posterior_m8 <- as_draws_array(fit_model_8_censor)
m8_table_raw <- summarize_draws(posterior_m8, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

posterior_m9 <- as_draws_array(fit_model_9_censor)
m9_table_raw <- summarize_draws(posterior_m9, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m8_table <- m8_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m8, "b_Intercept")) %>%
  slice(match(c(var.order.m8, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m8, "Intercept")) %>%
  column_to_rownames(var = "variable")

m9_table <- m9_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m9, "b_Intercept")) %>%
  slice(match(c(var.order.m9, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m9, "Intercept")) %>%
  column_to_rownames(var = "variable")

write.csv(round(m8_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model8.csv"), row.names=TRUE)

write.csv(round(m9_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model9.csv"), row.names=TRUE)
```

Temporal only:
  positive relationship with 10-year count lag
positive relationship with 10-year trial lag
no relationship with 10-year battle lag
positive with degree, positive with eigenvector
negative with Hanse, positive with free/imperial, none with catholic; none with climate

Spatiotemporal only:
  no relationship with with distance-scaled count
positive relationship with with distance-scaled trials
no relationship with distance-scaled battles
positive with degree and eigenvector
negative with Hanse, positive with free/imperial, none with catholic; none with climate

```{r robust_specifications_isolatecentrality, eval= TRUE, echo=FALSE}
# Are the centrality measures correlated and adding collinearities to the models?

surv_sample %>%
  dplyr::select(bhpr_runningnr, degree, between, eigen) %>%
  distinct() %>%
  dplyr::select(-bhpr_runningnr) %>%
  cor()
# relatively correlated - degree/betweenness at .71, degree/eigenvector at .58, betweenness/eigenvector at .34

# isolate the different centrality measures

model_10_censor <- as.formula(firsttrial ~ MM_sum10 + diffMM_10 +
                               tried_sum10 + difftried_10 +
                               battles_sum10 + diffbattle_10 +
                               degree +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

model_11_censor <- as.formula(firsttrial ~ MM_sum10 + diffMM_10 +
                               tried_sum10 + difftried_10 +
                               battles_sum10 + diffbattle_10 +
                               between +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

model_12_censor <- as.formula(firsttrial ~ MM_sum10 + diffMM_10 +
                               tried_sum10 + difftried_10 +
                               battles_sum10 + diffbattle_10 +
                               eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

# Sys.time()
fit_model_10_censor <- brm(model_10_censor,
                       data = surv_sample_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time() #  40 min
saveRDS(fit_model_10_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_10_censor.rds"))
# fit_model_10_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_10_censor.rds"))

# Sys.time()
fit_model_11_censor <- brm(model_11_censor,
                       data = surv_sample_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time() # 1 hr
saveRDS(fit_model_11_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_11_censor.rds"))
# fit_model_11_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_11_censor.rds"))
# Sys.time()
fit_model_12_censor <- brm(model_12_censor,
                       data = surv_sample_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time() # 1 hr
saveRDS(fit_model_12_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_12_censor.rds"))
# fit_model_12_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_12_censor.rds"))
```

```{r robust_specifications_population, eval= TRUE, echo=FALSE}
# Does controlling for population, as a measure of economic development, improve the models?

with(tradenet_all, cor(pop1500, degreeCentrality031622, use = "complete.obs"))
# relatively correlated - 0.39
# Values for 288 cities - of the 553 in our sample - is missingness correlated with something important? Eg centrality...
table(!is.na(tradenet_all$degreeCentrality031622), !is.na(tradenet_all$pop1500))
# 300 cities with both; 277 missing centrality
with(tradenet_all, summary(degreeCentrality031622))
with(tradenet_all[!is.na(tradenet_all$pop1500),], summary(degreeCentrality031622))
# all values higher, except max: will lose less central cities if limit by availability of population

model_13_censor <- as.formula(firsttrial ~ MM_sum10 + diffMM_10 +
                               tried_sum10 + difftried_10 + 
                               battles_sum10 + diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               logpop1500 +
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

surv_sample_pop <- surv_dataset_net[names(surv_dataset_net) %in% c(model_vars, "logpop1500")]
surv_sample_pop <- surv_sample_pop[complete.cases(surv_sample_pop),]
surv_sample_pop <- filter(surv_sample_pop, year>=1400&year<1651)
surv_sample_pop_scaled <- surv_sample_pop %>%
  mutate(across(all_of(scale_vars), ~scale(.) %>% as.vector))

# Sys.time()
fit_model_13_censor <- brm(model_13_censor,
                       data = surv_sample_pop_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time()
saveRDS(fit_model_13_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_13_censor.rds"))
# fit_model_13_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_13_censor.rds"))

var.order.m13 <- c("b_MM_sum10", "b_diffMM_10",
                  "b_tried_sum10", "b_difftried_10",
                  "b_degree", "b_between", "b_eigen",
                  "b_battles_sum10", "b_diffbattle_10", 
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_logpop1500",
                  "b_grid_0")
latex.label.order.m13 <- c("Malleus editions, past 10 years", 
                        "Malleus editions, distance-weighted",
                        "Total persons tried, past 10 years", "Total persons tried, distance-weighted", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, past 10 years", "Battles, distance-weighted",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Log population in 1500",
                        "Low coverage area")
posterior_m13 <- as_draws_array(fit_model_13_censor)
m13_table_raw <- summarize_draws(posterior_m13, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m13_table <- m13_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m13, "b_Intercept")) %>%
  slice(match(c(var.order.m13, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m13, "Intercept")) %>%
  column_to_rownames(var = "variable")
write.csv(round(m13_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model13.csv"), row.names=TRUE)
```

```{r robust_specifications_timetrend, eval= TRUE, echo=FALSE}
# Is there a general time trend that the temporal lags are detecting?
# Exploration above would indicate no...

model_vars_time <- c("period", "period_1", "period_2", "period_3", 
               "period_4", "period_5", "period_6", "period_7", "period_8", 
               "period_9", "period_10", "period_11", "period_12", "period_13", 
               "period_14", "period_15", "period_16", "period_17", "period_19", 
               "period_19", "period_20", "period_21", "period_22", "period_23", 
               "period_24", "period_25", "period_26", "period_27", "period_28", 
               "period_29", "period_30", "period_31", "period_32", "period_33", 
               "period_34", "period_35",
               "baseline_ann", "baseline_lag1", "baseline_mean7", 
               "baseline_pd", "baseline_pd_lag1", "baseline_pd_mean3")
surv_sample_time <- surv_dataset_net[names(surv_dataset_net) %in% c(model_vars, model_vars_time)]
surv_sample_time <- surv_sample_time[complete.cases(surv_sample_time),]
surv_sample_time <- filter(surv_sample_time, year>=1400&year<1651)
surv_sample_time_scaled <- surv_sample_time %>%
  mutate(across(all_of(scale_vars), ~scale(.) %>% as.vector))


model_14_censor <- as.formula(firsttrial ~ MM_sum10 + diffMM_10 +
                               tried_sum10 + difftried_10 + 
                               battles_sum10 + diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               period + # time trend continuous effects
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

model_15_censor <- as.formula(firsttrial ~ MM_sum10 + diffMM_10 +
                               tried_sum10 + difftried_10 + 
                               battles_sum10 + diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               # time trend fixed effects
                               period_1 + period_2 + period_3 + period_4 + 
                                period_5 + period_6 + period_7 + 
                                period_8 + period_9 + period_10 +
                                period_11 + period_12 + period_13 + period_14 + 
                                period_15 + period_16 + period_17 + 
                                period_18 + period_19 + period_20 + 
                                period_21 + period_22 + period_23 + period_24 + 
                                period_25 + period_26 + period_27 + 
                                period_28 + period_29 + period_30 +
                                period_13 + period_32 + period_33 + period_34 +
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects
# last period as referent because first period as referent just picks up that trials were more likely in basically any period than the first one.

model_16_censor <- as.formula(firsttrial ~ MM_sum10 + diffMM_10 +
                               tried_sum10 + difftried_10 + 
                               battles_sum10 + diffbattle_10 +
                               degree + between + eigen +
                               hanse_pagel +  # gov capacity
                               freeimp_jacob + cath_cheney + 
                               temp_adj_lag + prc_adj_lag + # climate
                               # time trend as rolling hazard in periods
                               baseline_pd_mean3 + 
                               grid_0 + # region fixed effects
                               (1|bhpr_runningnr))  # city random effects

# Sys.time()
fit_model_14_censor <- brm(model_14_censor,
                       data = surv_sample_time_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
Sys.time() #  40 min
saveRDS(fit_model_14_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_14_censor.rds"))
# fit_model_14_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_14_censor.rds"))
# Sys.time()
fit_model_15_censor <- brm(model_15_censor,
                       data = surv_sample_time_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
Sys.time() #  55 min
saveRDS(fit_model_15_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_15_censor.rds"))
# fit_model_15_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_15_censor.rds"))
Sys.time()
fit_model_16_censor <- brm(model_16_censor,
                       data = surv_sample_time_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
Sys.time() #  40 min
saveRDS(fit_model_16_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_16_censor.rds"))
# fit_model_16_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_16_censor.rds"))

var.order.m14 <- c("b_MM_sum10", "b_diffMM_10",
                  "b_tried_sum10", "b_difftried_10",
                  "b_degree", "b_between", "b_eigen",
                  "b_battles_sum10", "b_diffbattle_10", 
                  "b_temp_adj_lag", "b_prc_adj_lag",
                  "b_hanse_pagel", "b_freeimp_jacob", "b_cath_cheney",
                  "b_grid_0")
latex.label.order.m14 <- c("Malleus editions, past 10 years", 
                        "Malleus editions, distance-weighted",
                        "Total persons tried, past 10 years", "Total persons tried, distance-weighted", 
                        "Degree", "Betweenness", "Eigenvector",
                        "Battles, past 10 years", "Battles, distance-weighted",
                        "Relative temp.", "Relative precip.",
                        "Hanse member", "Free or imperial", "Catholic seat",
                        "Low coverage area")
posterior_m14 <- as_draws_array(fit_model_14_censor)
m14_table_raw <- summarize_draws(posterior_m14, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)
posterior_m15 <- as_draws_array(fit_model_15_censor)
m15_table_raw <- summarize_draws(posterior_m15, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)
posterior_m16 <- as_draws_array(fit_model_16_censor)
m16_table_raw <- summarize_draws(posterior_m16, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m14_table <- m14_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m14, "b_Intercept")) %>%
  slice(match(c(var.order.m14, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m14, "Intercept")) %>%
  column_to_rownames(var = "variable")
write.csv(round(m14_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model14.csv"), row.names=TRUE)

m15_table <- m15_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m14, "b_Intercept")) %>%
  slice(match(c(var.order.m14, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m14, "Intercept")) %>%
  column_to_rownames(var = "variable")
write.csv(round(m15_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model15.csv"), row.names=TRUE)

m16_table <- m16_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m14, "b_baseline_pd_mean3", "b_Intercept")) %>%
  slice(match(c(var.order.m14, "b_baseline_pd_mean3", "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m14, "Rolling hazard", "Intercept")) %>%
  column_to_rownames(var = "variable")
write.csv(round(m16_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model16.csv"), row.names=TRUE)
```

  The Malleus editions relationship is knocked out by m14, with the continuous time trend, but not by the period fixed effects (m15) or by  the adjusting hazard (m16). Essentially, what happens with adding different time trends is that the intercept becomes more negative. Also, the coefficient for Malleus editions is twice as large in m15 as in the main model or the other time trend models.

```{r robust_specifications_USTCext, eval= TRUE, echo=FALSE}
# USTC was extended to 1700. How do results compare when using the extended data?

surv_sample_ext_scaled <- surv_sample_ext %>%
  mutate(across(all_of(scale_vars), ~scale(.) %>% as.vector))

# Sys.time()
fit_model_17_censor <- brm(model_1_censor,
                       data = surv_sample_ext_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
# Sys.time()
saveRDS(fit_model_17_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_17_censor.rds"))
# fit_model_17_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_17_censor.rds"))

fit_model_18_censor <- brm(model_2_censor,
                       data = surv_sample_ext_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
Sys.time()
saveRDS(fit_model_18_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_18_censor.rds"))
# fit_model_18_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_18_censor.rds"))

fit_model_19_censor <- brm(model_3_censor,
                       data = surv_sample_ext_scaled,
                       bernoulli(link = "logit"),
                       prior = prior_model_1,
                       chains = 4, iter = 4000, warmup=1000, seed=1487,
                       control = list(adapt_delta = 0.9),
                       cores = parallel::detectCores())
Sys.time()
saveRDS(fit_model_19_censor, here::here("Witchtrial_diffusion","out_data", "fit_model_19_censor.rds"))
# fit_model_19_censor <- readRDS(here::here("Witchtrial_diffusion","out_data", "fit_model_19_censor.rds"))

posterior_m17 <- as_draws_array(fit_model_17_censor)
m17_table_raw <- summarize_draws(posterior_m17, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m17_table <- m17_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m1, "b_Intercept")) %>%
  slice(match(c(var.order.m1, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m1, "Intercept")) %>%
  column_to_rownames(var = "variable")
write.csv(round(m17_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model17.csv"), row.names=TRUE)

posterior_m18 <- as_draws_array(fit_model_18_censor)
m18_table_raw <- summarize_draws(posterior_m18, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m18_table <- m18_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m2, "b_Intercept")) %>%
  slice(match(c(var.order.m2, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m2, "Intercept")) %>%
  column_to_rownames(var = "variable")
write.csv(round(m18_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model18.csv"), row.names=TRUE)

posterior_m19 <- as_draws_array(fit_model_19_censor)
m19_table_raw <- summarize_draws(posterior_m19, mean, mcse_mean, ~quantile(.x, probs = c(0.055, 0.945)), rhat, ess_bulk, ess_tail)

m19_table <- m19_table_raw %>%
  as_tibble() %>%
  dplyr::filter(variable %in% c(var.order.m3, "b_Intercept")) %>%
  slice(match(c(var.order.m3, "b_Intercept"), variable)) %>%
  mutate(variable=c(latex.label.order.m3, "Intercept")) %>%
  column_to_rownames(var = "variable")
write.csv(round(m19_table,2), here::here("Witchtrial_diffusion","out_tables","table_aa_model19.csv"), row.names=TRUE)
```

## Extra
 
 
 Table of descriptives for scaled data, all years  
   
```{r descriptives_scaled_setup, eval=TRUE, echo=FALSE}
# all possible variables...

descr_surv_net_scaled <- sapply(surv_sample_scaled, 
                    function(x) rbind(count = length(which(!is.na(x))), 
                                      minimum = min(as.numeric(x), na.rm=TRUE),
                                      maximum = max(as.numeric(x), na.rm=TRUE),
                                      mean = mean(as.numeric(x), na.rm=TRUE),
                                      median = median(as.numeric(x), na.rm=TRUE),
                                      stdev = sd(as.numeric(x), na.rm=TRUE)))
descr_surv_net_scaled <- round(t(descr_surv_net_scaled), 2)
colnames(descr_surv_net_scaled) <- c("n","Min","Max","Mean","Median","St.Dev")
rownames(descr_surv_net_scaled) <- dimnames(descr_surv_net_scaled)[[1]]

table_aa <- descr_surv_net_scaled[table_2_vars,]
rownames(table_aa) <- table_2_names
write.csv(table_aa, here::here("Witchtrial_diffusion", "out_data", "descriptives_surv_scaled_table.csv"), row.names=TRUE)

table_aa
```
```{r table_aa_descriptives_scaled, eval=FALSE, results='asis', echo=FALSE}
stargazer(t(descr_scaled[,c(table_1_vars, table_2_vars, "temp_lag1")]), digits=2, type = "html", summary = FALSE, align=TRUE, out=here::here("Witchtrial_diffusion","out_tables","descriptives_scaled.htm"))
```


***
## R Session Info
```{r session_info}
sessionInfo()
```
